In [1]:
import pandas as pd
import seaborn as sns
file = "FY_2024_Hospital_Readmissions_Reduction_Program_Hospital (1).csv"
data = pd.read_csv(file)
In [ ]:
data['Number of Discharges'].fillna(data['Number of Discharges'].median(), inplace=True)
In [ ]:
data['Number of Readmissions'] = pd.to_numeric(data['Number of Readmissions'], errors='coerce')
readmissions_by_diagnosis = data.groupby('Measure Name')['Number of Readmissions'].sum()
readmissions_by_diagnosis_sorted = readmissions_by_diagnosis.sort_values(ascending=False)
top_5_diagnoses = readmissions_by_diagnosis_sorted.head(5)
for diagnosis, readmissions in top_5_diagnoses.items():
    print(f"{diagnosis}: {int(readmissions):,} readmissions.")
READM-30-HF-HRRP: 148,213 readmissions.
READM-30-PN-HRRP: 101,169 readmissions.
READM-30-COPD-HRRP: 39,275 readmissions.
READM-30-AMI-HRRP: 33,269 readmissions.
READM-30-HIP-KNEE-HRRP: 6,721 readmissions.

The above reads: Heart Failure, Pneuomnia, Chronic Obstructive Pulmonary Disease Care, Heart Attack Care, and Total Hip/Knee Arthroplasty in order from top to bottom.

In [ ]:
data.describe()
data[['Excess Readmission Ratio', 'Predicted Readmission Rate', 'Number of Readmissions']].info()
In [ ]:
correlation_matrix = data[['Excess Readmission Ratio', 'Predicted Readmission Rate', 'Number of Readmissions']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
Out[ ]:
<Axes: >
No description has been provided for this image
In [2]:
import plotly.express as px

columns_needed = ['State', 'Measure Name', 'Number of Readmissions']
data_cleaned = data[columns_needed]

data_cleaned['Number of Readmissions'] = pd.to_numeric(data_cleaned['Number of Readmissions'], errors='coerce')

state_agg = data_cleaned.groupby(['State', 'Measure Name'], as_index=False).sum()

state_agg['Rank'] = state_agg.groupby('State')['Number of Readmissions'].rank(method='max', ascending=False)
prevalent_cause = state_agg[state_agg['Rank'] == 1]

map_data = prevalent_cause[['State', 'Measure Name', 'Number of Readmissions']]
map_data.rename(columns={
    'Measure Name': 'Most Prevalent Cause',
    'Number of Readmissions': 'Total Readmissions'
}, inplace=True)

state_codes = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
    'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
    'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
    'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
    'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
    'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi',
    'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
    'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York',
    'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
    'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
    'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia',
    'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia'
}
map_data['State Name'] = map_data['State'].map(state_codes)

regions = {
    'Midwest': [
        'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Michigan', 'Minnesota',
        'Missouri', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'
    ],
    'West': [
        'Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana',
        'Nevada', 'Oregon', 'Utah', 'Washington', 'Wyoming'
    ],
    'Southeast': [
        'Alabama', 'Arkansas', 'Florida', 'Georgia', 'Kentucky', 'Louisiana',
        'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee', 'Virginia',
        'West Virginia'
    ],
    'Southwest': [
        'Arizona', 'New Mexico', 'Oklahoma', 'Texas'
    ],
    'Northeast': [
        'Connecticut', 'Delaware', 'Maine', 'Maryland', 'Massachusetts',
        'New Hampshire', 'New Jersey', 'New York', 'Pennsylvania',
        'Rhode Island', 'Vermont', 'District of Columbia'
    ]
}

def assign_region(state_name):
    for region, states in regions.items():
        if state_name in states:
            return region
    return 'Unknown'

map_data['Region'] = map_data['State Name'].apply(assign_region)

fig = px.choropleth(
    map_data,
    locations='State',
    locationmode="USA-states",
    color='Most Prevalent Cause',
    hover_name='State Name',
    hover_data={
        'Total Readmissions': True,
        'Most Prevalent Cause': True,
        'Region': True,
        'State': False,
    },
    scope="usa",
    title="Most Prevalent Causes of Readmission by State with Regions",
    labels={'Most Prevalent Cause': 'Prevalent Cause'}
)

fig.update_layout(geo=dict(bgcolor='rgba(0,0,0,0)'))

fig.show()
C:\Users\joeth\AppData\Local\Temp\ipykernel_27456\2662286663.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['Number of Readmissions'] = pd.to_numeric(data_cleaned['Number of Readmissions'], errors='coerce')
C:\Users\joeth\AppData\Local\Temp\ipykernel_27456\2662286663.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  map_data.rename(columns={
In [3]:
columns_needed = ['Facility Name', 'State', 'Measure Name', 'Predicted Readmission Rate', 'Expected Readmission Rate']
scatter_data = data[columns_needed].dropna()

scatter_data['Predicted Readmission Rate'] = pd.to_numeric(scatter_data['Predicted Readmission Rate'], errors='coerce')
scatter_data['Expected Readmission Rate'] = pd.to_numeric(scatter_data['Expected Readmission Rate'], errors='coerce')

state_codes = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
    'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
    'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
    'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
    'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
    'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi',
    'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
    'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York',
    'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
    'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
    'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia',
    'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia'
}
scatter_data['State Name'] = scatter_data['State'].map(state_codes)

regions = {
    'Midwest': [
        'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Michigan', 'Minnesota',
        'Missouri', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'
    ],
    'West': [
        'Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana',
        'Nevada', 'Oregon', 'Utah', 'Washington', 'Wyoming'
    ],
    'Southeast': [
        'Alabama', 'Arkansas', 'Florida', 'Georgia', 'Kentucky', 'Louisiana',
        'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee', 'Virginia',
        'West Virginia'
    ],
    'Southwest': [
        'Arizona', 'New Mexico', 'Oklahoma', 'Texas'
    ],
    'Northeast': [
        'Connecticut', 'Delaware', 'Maine', 'Maryland', 'Massachusetts',
        'New Hampshire', 'New Jersey', 'New York', 'Pennsylvania',
        'Rhode Island', 'Vermont', 'District of Columbia'
    ]
}

def assign_region(state_name):
    for region, states in regions.items():
        if state_name in states:
            return region
    return 'Unknown'

scatter_data['Region'] = scatter_data['State Name'].apply(assign_region)

measure_name_mapping = {
    'READM-30-HF-HRRP': 'Heart Failure',
    'READM-30-PN-HRRP': 'Pneumonia',
    'READM-30-COPD-HRRP': 'Chronic Obstructive Pulmonary Disease',
    'READM-30-AMI-HRRP': 'Acute Myocardial Infarction',
    'READM-30-HIP-KNEE-HRRP': 'Hip and Knee Replacements',
    'READM-30-CABG-HRRP': 'Coronary Artery Bypass Graft'
}

scatter_data['Measure Name'] = scatter_data['Measure Name'].replace(measure_name_mapping)

def create_region_scatter(region_name):
    region_data = scatter_data[scatter_data['Region'] == region_name]

    if region_data.empty:
        print(f"No data available for region: {region_name}")
        return

    fig = px.scatter(
        region_data,
        x='Predicted Readmission Rate',
        y='Expected Readmission Rate',
        color='Measure Name',
        hover_name='Facility Name',
        hover_data={
            'State Name': True,
            'Measure Name': True,
            'Predicted Readmission Rate': True,
            'Expected Readmission Rate': True,
        },
        title=f"{region_name} - Predicted vs. Expected Readmission Rates",
        labels={
            'Predicted Readmission Rate': 'Predicted Rate (%)',
            'Expected Readmission Rate': 'Expected Rate (%)',
            'Measure Name': 'Condition'
        }
    )
    fig.update_traces(marker=dict(size=10, opacity=0.7))
    fig.update_layout(
        xaxis_title='Predicted Readmission Rate (%)',
        yaxis_title='Expected Readmission Rate (%)',
        legend_title='Condition',
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(255,255,255,1)'
    )
    fig.show()

for region in regions.keys():
    create_region_scatter(region)